Lab 5 Extra: Interactive Graphs and Animations from the COVID-19 reporting data

More information related to this lab can be found here.

Challenge Exercises

Students in the 697 class need to complete the following challenges.

Challenge Exercise 1

Print a graph to a png file using 3*ppi for the height and width and display the png file in the report using the above R Markdown format.

library(tidyverse)
## ── Attaching packages ──────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
  mutate(report_type = "confirmed_cases") %>%
  select(report_type, everything())
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 254
##   report_type Province_State Country_Region   Lat   Long `1/22/20` `1/23/20`
##   <chr>       <chr>          <chr>          <dbl>  <dbl>     <dbl>     <dbl>
## 1 confirmed_… <NA>           Afghanistan     33.9  67.7          0         0
## 2 confirmed_… <NA>           Albania         41.2  20.2          0         0
## 3 confirmed_… <NA>           Algeria         28.0   1.66         0         0
## 4 confirmed_… <NA>           Andorra         42.5   1.52         0         0
## 5 confirmed_… <NA>           Angola         -11.2  17.9          0         0
## 6 confirmed_… <NA>           Antigua and B…  17.1 -61.8          0         0
## # … with 247 more variables: `1/24/20` <dbl>, `1/25/20` <dbl>, `1/26/20` <dbl>,
## #   `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## #   `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## #   `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## #   `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## #   `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## #   `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## #   `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## #   `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
## #   `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
## #   `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
## #   `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
## #   `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
## #   `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
## #   `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
## #   `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
## #   `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
## #   `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
## #   `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
## #   `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
## #   `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
## #   `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
## #   `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
## #   `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
## #   `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>,
## #   `5/2/20` <dbl>, …
library(tidyverse)

time_series_deaths <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region") %>%
  mutate(report_type = "confirmed_deaths") %>%
  select(report_type, everything())
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_deaths)
## # A tibble: 6 x 254
##   report_type Province_State Country_Region   Lat   Long `1/22/20` `1/23/20`
##   <chr>       <chr>          <chr>          <dbl>  <dbl>     <dbl>     <dbl>
## 1 confirmed_… <NA>           Afghanistan     33.9  67.7          0         0
## 2 confirmed_… <NA>           Albania         41.2  20.2          0         0
## 3 confirmed_… <NA>           Algeria         28.0   1.66         0         0
## 4 confirmed_… <NA>           Andorra         42.5   1.52         0         0
## 5 confirmed_… <NA>           Angola         -11.2  17.9          0         0
## 6 confirmed_… <NA>           Antigua and B…  17.1 -61.8          0         0
## # … with 247 more variables: `1/24/20` <dbl>, `1/25/20` <dbl>, `1/26/20` <dbl>,
## #   `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## #   `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## #   `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## #   `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## #   `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## #   `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## #   `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## #   `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
## #   `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
## #   `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
## #   `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
## #   `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
## #   `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
## #   `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
## #   `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
## #   `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
## #   `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
## #   `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
## #   `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
## #   `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
## #   `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
## #   `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
## #   `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
## #   `4/28/20` <dbl>, `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>,
## #   `5/2/20` <dbl>, …
library(tidyverse)

time_series_joined <- bind_rows(time_series_confirmed, time_series_deaths) %>%
  gather(key="date", value="count", 6:length(.)) %>%
  mutate(date=lubridate::mdy(date)) %>%
  spread(key="report_type", value="count") %>%
  mutate(confirmed_cases=replace_na(confirmed_cases, 0),
         confirmed_deaths=replace_na(confirmed_deaths, 0))

head(time_series_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat   Long date       confirmed_cases
##   <chr>          <chr>          <dbl>  <dbl> <date>               <dbl>
## 1 <NA>           Afghanistan     33.9  67.7  2020-01-22               0
## 2 <NA>           Albania         41.2  20.2  2020-01-22               0
## 3 <NA>           Algeria         28.0   1.66 2020-01-22               0
## 4 <NA>           Andorra         42.5   1.52 2020-01-22               0
## 5 <NA>           Angola         -11.2  17.9  2020-01-22               0
## 6 <NA>           Antigua and B…  17.1 -61.8  2020-01-22               0
## # … with 1 more variable: confirmed_deaths <dbl>
tail(time_series_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long date       confirmed_cases
##   <chr>          <chr>          <dbl> <dbl> <date>               <dbl>
## 1 <NA>           Syria           34.8  39.0 2020-09-26               0
## 2 <NA>           United Arab E…  23.4  53.8 2020-09-26               0
## 3 Montserrat     United Kingdom  16.7 -62.2 2020-09-26               0
## 4 Turks and Cai… United Kingdom  21.7 -71.8 2020-09-26               0
## 5 <NA>           Vietnam         14.1 108.  2020-09-26               0
## 6 <NA>           Yemen           15.6  48.5 2020-09-26               0
## # … with 1 more variable: confirmed_deaths <dbl>
library(tidyverse)

time_series_sum <- time_series_joined %>%
  group_by(date) %>%
  summarize(cases = sum(confirmed_cases),
            deaths = sum(confirmed_deaths)) %>%
  mutate(deaths_cases = deaths/cases)

head(time_series_sum)
## # A tibble: 6 x 4
##   date       cases deaths deaths_cases
##   <date>     <dbl>  <dbl>        <dbl>
## 1 2020-01-22   555     17       0.0306
## 2 2020-01-23   654     18       0.0275
## 3 2020-01-24   941     26       0.0276
## 4 2020-01-25  1434     42       0.0293
## 5 2020-01-26  2118     56       0.0264
## 6 2020-01-27  2927     82       0.0280
library(tidyverse)

time_series_sum <- time_series_joined %>%
  filter(Country_Region=="US") %>%                      #| Filter only US Data
  group_by(date) %>%
  summarize(cases = sum(confirmed_cases),
            deaths = sum(confirmed_deaths)) %>%
  mutate(deaths_cases = deaths/cases)
ppi=300

library(ggplot2)

p1 <- ggplot(time_series_sum, aes(x=date, y=deaths_cases)) +
  geom_line() +
  labs(x="", y="Deaths/Cases", title="Fatality Rate in the U.S.") +
  theme_linedraw() +
  theme(plot.title=element_text(face="bold", hjust=0.5),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())

png("/Users/greg/Box/courses/umass/fall-2020/evolutionary-genomics-and-bioinformatics/evo-geno-course/images/fatality-rate-in-the-usa-plot.png", 
    width=3*ppi, height=3*ppi, res=ppi)

p1

dev.off()
## quartz_off_screen 
##                 2

Challenge Exercise 2

Turn one of the exercises from Lab 5 into an interactive graph with plotyly.

library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p1 <- ggplot(time_series_sum, aes(x=date, y=deaths_cases)) +
  geom_line() +
  labs(x="", y="Deaths/Cases", title="Fatality Rate in the U.S.") +
  theme_linedraw() +
  theme(plot.title=element_text(face="bold", hjust=0.5),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank())

ggplotly(p1)

Challenge Exercise 3

Create an animated graph of your choosing using the time series data to display an aspect (e.g. states or countries) of the data that is important to you.

library(tidyverse)

time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")) %>%
  select(7:length(.)) %>%
  gather(key="date", value="cases", 6:length(.)) %>%
  mutate(date = lubridate::mdy(date)) %>%
  group_by(date, Province_State) %>%
  summarize(total_cases = sum(cases)) %>%
  filter(Province_State %in% c("Pennsylvania", "Washington", "Massachusetts"))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 3
## # Groups:   date [2]
##   date       Province_State total_cases
##   <date>     <chr>                <dbl>
## 1 2020-01-22 Massachusetts            0
## 2 2020-01-22 Pennsylvania             0
## 3 2020-01-22 Washington               1
## 4 2020-01-23 Massachusetts            0
## 5 2020-01-23 Pennsylvania             0
## 6 2020-01-23 Washington               1
library(ggplot2)
library(gganimate)
library(transformr)
library(gifski)

p1 <- ggplot(time_series_confirmed, aes(x=date, y=total_cases)) +
  labs(x="", y="confirmed cases", title="COVID-19 Cases by State") +
  geom_line(aes(color=Province_State)) +
  theme_linedraw() +
  theme(plot.title=element_text(face="bold", hjust=0.5),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        legend.position="bottom",
        legend.title=element_blank()) +
  transition_reveal(date)

animate(p1, renderer = gifski_renderer(), end_pause = 15)